import json
import os
import pprint
import asyncio
from datetime import datetime
from time import sleep
from tqdm import tqdm
import argparse
from collections import Counter
from distutils.util import strtobool
import multiprocessing as mp


from src.evol.data_utils import load_data
from src.evol.openai_backend import call_chatgpt, LLM
from src.utils.data_utils import extract_answer_math, extract_answer_number
from src.utils.code_utils import execute_tora, execute_code_interactive
from src.utils.math_utils import compare_ans, vote


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose", action="store_true")
    parser.add_argument("--prompt_path", default=None, type=str)
    parser.add_argument("--strategy_path", default=None, type=str)
    parser.add_argument("--dataset", default="gsm", type=str)
    parser.add_argument("--data_path", default=None, type=str)
    parser.add_argument("--model", default="gpt-3.5-turbo", type=str)
    parser.add_argument("--temperature", default=0.0, type=float)
    parser.add_argument("--top_p", default=1.0, type=float)
    parser.add_argument("--max_tokens", default=1024, type=int)
    parser.add_argument("--num_seqs", default=1, type=int)
    parser.add_argument("--num_skips", default=0, type=int)
    parser.add_argument("--input_col", default="question", type=str)
    parser.add_argument("--output_col", default="answer", type=str)
    parser.add_argument("--max_iter", default=3, type=int)
    parser.add_argument("--num_process", default=1, type=int)
    parser.add_argument("--output_path", default=None, type=str)
    parser.add_argument("--batch_size", default=10, type=int)
    args = parser.parse_args()
    return args


def load_prompt(prompt_path):
    with open(prompt_path, "r", encoding="utf-8") as fp:
        prompt = fp.read().strip()
    return prompt


def stop_tora(result):
    if "\\boxed" in result:
        return True
    return False


def clean_strategy(strategy):
    strategy = strategy.split("## Strategy")[-1].strip()
    strategy = strategy.split("##")[0].strip()
    return strategy


def parse_output(output):
    num_sols = output.count("## Solution")
    if num_sols != 1:
        return None, None
    strategy, solution = output.split("## Solution")
    strategy = clean_strategy(strategy)
    solution = solution.strip()
    return strategy, solution


def get_batch_strategies(llm, batch_messages):
    batch_outputs = asyncio.run(
        llm.achat(
            batch_messages,
            model=args.model,
            stop=["\n##"],
            max_tokens=args.max_tokens,
            temperature=args.temperature,
            num_beams=args.num_seqs,
        )
    )
    strategies = [
        list(set([clean_strategy(s) for s in strategies]))
        for strategies in batch_outputs
    ]
    return strategies


def batch_tora(llm, batch_messages_base):
    for itr in range(3):
        if itr == 0:
            batch_messages = []
            batch_ids = []
            for sample_messages in batch_messages_base:
                batch_messages.extend(sample_messages)
                batch_ids.append(len(sample_messages))
            batch_outputs = asyncio.run(
                llm.achat(
                    batch_messages,
                    model=args.model,
                    stop=["```output", "---"],
                    max_tokens=args.max_tokens,
                    temperature=0,
                    num_beams=1,
                )
            )
            temp_outputs = []
            lst_idx = 0
            for i, batch_id in enumerate(batch_ids):
                temp_outputs.append(
                    [
                        outputs[0]
                        for outputs in batch_outputs[
                            lst_idx : lst_idx + batch_id
                        ]
                    ]
                )
                lst_idx += batch_id
            batch_outputs = temp_outputs
            batch_full_outputs = [
                ["" for o in messages] for messages in batch_messages_base
            ]
        else:
            batch_outputs = asyncio.run(
                llm.achat(
                    batch_messages,
                    model=args.model,
                    stop=["```output", "---"],
                    max_tokens=args.max_tokens,
                    temperature=0,
                    num_beams=1,
                )
            )
            temp_outputs = [[None for _ in outputs] for outputs in batch_full_outputs]
            for outputs, idx in zip(batch_outputs, remain_ids):
                temp_outputs[idx[0]][idx[1]] = outputs[0]
            batch_outputs = temp_outputs
        print(batch_outputs)
        print(len(batch_outputs), len(batch_outputs[0]))
        print(len(batch_full_outputs), len(batch_full_outputs[0]))

        remain_ids = []
        for i, outputs in enumerate(batch_outputs):
            for j, output in enumerate(outputs):
                if output is not None:
                    batch_full_outputs[i][j] += output
                    if not stop_tora(output):
                        code_output = execute_code_interactive(output)
                        print("code output", code_output)
                        if len(code_output) > 0:
                            code_output = f"```output\n{code_output}\n```\n"
                            batch_full_outputs[i][j] += code_output
                        remain_ids.append((i, j))
        if len(remain_ids) == 0:
            break
        user_messages = [
            batch_messages_base[idx[0]][idx[1]][1]["content"]
            + batch_full_outputs[idx[0]][idx[1]]
            for idx in remain_ids
        ]
        print(user_messages)
        batch_messages = [
            [
                {
                    "role": "system",
                    "content": "You are a helpful expert for math problem solving.",
                },
                {
                    "role": "user",
                    "content": user_message,
                },
            ]
            for user_message in user_messages
        ]
    return batch_full_outputs


def main(args, samples, idx):
    # load prompt
    strategy_prompt = load_prompt(args.strategy_path)
    prompt = load_prompt(args.prompt_path)
    if idx <= 0:
        print(prompt)
        print(strategy_prompt)
    os.makedirs(f"result/{args.model}/{args.dataset}", exist_ok=True)
    if args.output_path is None:
        output_path = f"result/{args.model}/{args.dataset}/t{args.temperature}_n{args.num_seqs}-train_specific.jsonl"
    else:
        output_path = args.output_path
    print("%" * 30, "Tora", "%" * 30)
    print("Start PID %d and save to %s" % (os.getpid(), output_path))

    if idx != -1:
        output_path = output_path.replace(".jsonl", f"_{idx}.jsonl")
    save_samples, scores = [], []
    samples = samples[args.num_skips :]
    llm = LLM()
    batch_size = args.batch_size
    print(samples[0]["question"])
    with open(output_path, "w" if args.num_skips == 0 else "a") as f:
        for i in tqdm(range(0, len(samples), batch_size)):
            start = i
            batch_samples = samples[start : start + batch_size]
            batch_messages = [
                [
                    {
                        "role": "system",
                        "content": "You are a helpful expert for math problem solving.",
                    },
                    {
                        "role": "user",
                        "content": strategy_prompt.replace("{question}", s["question"]),
                    },
                ]
                for s in batch_samples
            ]
            batch_strategies = get_batch_strategies(llm, batch_messages)
            print(batch_strategies)
            batch_messages = [
                [
                    [
                        {
                            "role": "system",
                            "content": "You are a helpful expert for math problem solving.",
                        },
                        {
                            "role": "user",
                            "content": prompt.replace(
                                "{question}", s["question"]
                            ).replace("{strategy}", strategy),
                        },
                    ]
                    for strategy in strategies
                ]
                for s, strategies in zip(batch_samples, batch_strategies)
            ]
            batch_outputs = batch_tora(llm, batch_messages)
            print(batch_outputs)
            for s, strategies, outputs in zip(
                batch_samples, batch_strategies, batch_outputs
            ):
                strategies, solutions, pred_anss = [], [], []
                for strategy, o in zip(strategies, outputs):
                    # strategy, solution = parse_output(o)
                    solution = o.strip()
                    # if strategy is None:
                    #     continue
                    strategies.append(strategy)
                    solutions.append(solution)
                    pred_anss.append(extract_answer_math(solution))
                label_ans = s["answer"]
                score = 0
                for p in pred_anss:
                    if compare_ans(p, label_ans):
                        score = 1
                        break
                scores.append(score)
                save_sample = s
                save_sample["generation"] = outputs
                save_sample["strategy"] = strategies
                save_sample["pred_answers"] = pred_anss
                save_sample["label_answer"] = label_ans
                save_sample["score"] = score
                save_samples.append(save_sample)
                f.write(json.dumps(save_sample, ensure_ascii=False, indent=4) + "\n")
                f.flush()
    print(f"Accuracy - {sum(scores) / len(scores)}")


if __name__ == "__main__":
    args = parse_args()
    samples = load_data(args.dataset, args.data_path)
    if args.num_process == 1:
        main(args, samples, idx=-1)
    else:
        num_each_split = int(len(samples) / args.num_process)
        p = mp.Pool(args.num_process)
        for idx in range(args.num_process):
            start = idx * num_each_split
            if idx == args.num_process - 1:
                end = max((idx + 1) * num_each_split, len(samples))
            else:
                end = (idx + 1) * num_each_split
            split_data = samples[start:end]
            p.apply_async(
                main,
                args=(
                    args,
                    split_data,
                    idx,
                ),
            )
        p.close()
        p.join()
        print("All of the child processes over!")
